aggdata_conservative$prevalenceMeans <- rowMeans(aggdata_conservative[c("state_prev", "ai_prev", "hrw_prev")], na.rm=TRUE)
# Plotting
library(ggplot2)
require(gridExtra)
# plot optimistic counting
plot1 <- ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp)) +
geom_point(size=3)
# plot conservative counting
plot2 <- ggplot(data=aggdata_conservative, aes(x=time, y=prevalenceMeans, colour=grp)) +
geom_line(aes(group=grp)) +
geom_point(size=3)
grid.arrange(plot1, plot2, nrow=2)
#plot(aggdata$time, aggdata$prevalenceMeans, pch=aggdata$grp, type="b")
View(aggdata_optimistic)
View(aggdata_conservative)
View(data)
data$highestPrevalence <- apply(data[c("state_prev", "ai_prev", "hrw_prev")], 1, max, na.rm=T)
data$highestPrevalence[is.infinite(data$highestPrevalence) | is.nan(data$highestPrevalence)] <- NA
aggdata_optimistic <- aggregate(data[c("highestPrevalence")], list(time=data$year, grp=data$groups_simplified), mean, na.rm=TRUE)
### KONSERVATIVE METHODE (mean aus allen drei prevalence quellen)
aggdata_conservative <- aggregate(data[c("state_prev", "ai_prev", "hrw_prev")], list(time=data$year, grp=data$groups_simplified), mean, na.rm=TRUE)
aggdata_conservative$prevalenceMeans <- rowMeans(aggdata_conservative[c("state_prev", "ai_prev", "hrw_prev")], na.rm=TRUE)
View(aggdata_optimistic)
View(aggdata_conservative)
data <- read.csv(file.choose(), na.strings=c(-99, NULL, "", " "), fileEncoding="utf8")
# erstelle kleines dataset nur mit den variablen die mich interessieren (eigentlich nicht nötig)
tsdf <- data[c("year", "state_prev", "ai_prev", "hrw_prev", "actor_type", "form")]
# subset nur mit fällen in denen explizit von "rape" die sprache ist!
#tsdf <- subset(data, select=c("year", "state_prev", "ai_prev", "hrw_prev", "actor_type", "form"), subset=(form=="rape"))
# missing values deklarieren (müssen in späteren funktionen, z.B. mean allerdings immer noch spezifisch von kalkulation ausgeschlossen werden!)
# ?berfl?ssig, da oben im import deklariert!
#tsdf$state_prev[tsdf$state_prev==(-99)] <- NA
#tsdf$ai_prev[tsdf$ai_prev==(-99)] <- NA
#tsdf$hrw_prev[tsdf$hrw_prev==(-99)] <- NA
#tsdf$form[tsdf$form==(-99)] <- NA
#tsdf$state_prev[is.null(tsdf$state_prev)] <- NA
#tsdf$ai_prev[is.null(tsdf$ai_prev)] <- NA
#tsdf$hrw_prev[is.null(tsdf$hrw_prev)] <- NA
#tsdf$form[is.null(tsdf$form)] <- NA
# Means für jeden Fall aus den drei prevalence-Variablen bilden; wenn aggregiert wird (s.u.) sollte das erst an späterer stelle erfolgen!
# die nächsten beiden Zeilen sind äquivalent, sofern 2:4 die Position der Spalte/Variable sind!
#tsdf$rowMeans <- rowMeans(tsdf[,2:4], na.rm=TRUE)
#tsdf$prevalenceMeans <- rowMeans(tsdf[c("state_prev", "ai_prev", "hrw_prev")], na.rm=TRUE)
#tsdf$prevalenceRounded <- round(tsdf$prevalenceMeans)
# recode groups: staatliche Akteure (1,2,4) zusammenfassen, andere (3=rebellen, 6=pro-government militias)
# | = OR
tsdf$groups_simplified <- NA # best practice: Variable mit NA initialisieren!
tsdf$groups_simplified[tsdf$actor_type==1 | tsdf$actor_type==2 | tsdf$actor_type==4] <- as.character("State Actor")
tsdf$groups_simplified[tsdf$actor_type==3] <- as.character("Non-State Actor")
tsdf$groups_simplified[tsdf$actor_type==6] <- as.character("Government-supporting Militia")
## DATA BY YEAR
### Optimistische MEthode (in jedem Fall wird das höchste behalten)
# neue variable die jeweils den höchsten wert aus SD, AI und HRW enthält
#tsdf[is.null(tsdf)]<-0
# problem mit na.rm: wenn =T, dann wird in Zeilen wo alle Zellen NA sind max=-Inf
# wenn na.rm=F tritt dieses Problem nicht auf, wenn aber in einer Quelle NA, ist auch max=NA, egal ob eine andere quelle z.B. =3 (hohe Prevalence) ist!
tsdf$highestPrevalence <- apply(tsdf[c("state_prev", "ai_prev", "hrw_prev")], 1, max, na.rm=T)
# daher in einem zwischenschritt alle -Inf durch NA ersetzen!
tsdf$highestPrevalence[is.infinite(tsdf$highestPrevalence) | is.nan(tsdf$highestPrevalence)] <- NA
# aggregate by years for every recoded actor_type ($groups_simplified)
aggdata_optimistic <- aggregate(tsdf[c("highestPrevalence")], list(time=tsdf$year, grp=tsdf$groups_simplified), mean, na.rm=TRUE)
### KONSERVATIVE METHODE (mean aus allen drei prevalence quellen)
# aggregate by years for every recoded actor_type ($groups_simplified)
aggdata_conservative <- aggregate(tsdf[c("state_prev", "ai_prev", "hrw_prev")], list(time=tsdf$year, grp=tsdf$groups_simplified), mean, na.rm=TRUE)
# konservative methode: MEAN aus den drei berechnen
aggdata_conservative$prevalenceMeans <- rowMeans(aggdata_conservative[c("state_prev", "ai_prev", "hrw_prev")], na.rm=TRUE)
# Plotting
library(ggplot2)
par(mfrow=c(2,1))
# plot optimistic counting
plot1 <- ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp)) +
geom_point(size=3)
# plot conservative counting
plot2 <- ggplot(data=aggdata_conservative, aes(x=time, y=prevalenceMeans, colour=grp)) +
geom_line(aes(group=grp)) +
geom_point(size=3)
require(gridExtra)
grid.arrange(plot1, plot2, nrow=2)
#plot(aggdata$time, aggdata$prevalenceMeans, pch=aggdata$grp, type="b")
data <- read.csv("M:/Desktop/MA/SVACDataset.csv")
attach(data)
subset(data, select=(2))
plot(year, state_prev)
# recode 99 to missing for variable v1
# select rows where v1 is 99 and recode column v1
data$state_prev[data$state_prev==(-99)] <- NA
plotData <- na.omit(subset(data,select=c(year,state_prev,ai_prev, hrw_prev)))
plot(plotData$year, plotData$state_prev)
data <- data[order(data$year, data$actor),]
# im datensatz wird prevalence nach quelle getrennt
# muss selbst eine kummulierte variable berechnen
# hier ein vorschlag, der den mean aus allen nutzt und dann rundet; eher konservative variante?!
round(mean(c(1,2,3)))
# -> wie kann ich das jetzt in jeder zeile einzeln anwenden? lapply?s
# nützliche Statistiken
# Welche Ausprägungen nimmt eine Variable an?
unique(data$year)
# Wieviele verschiedene Ausprägungen nimmt eine Variable an?
length(unique(data$year)
# Korrelationen aus Teilen des Datasets berechnen!
cor(x=subset(aggdata, grp==1, select=highestPrevalence), y=subset(aggdata, grp==3, select=highestPrevalence))
# HIER GEHTS RICHTIG LOS!
# import csv: NOCH FILE ENCODING HINZUF?GEN!
data <- read.csv(file.choose(), na.strings=c(-99, NULL, "", " "), fileEncoding="utf8")
# erstelle kleines dataset nur mit den variablen die mich interessieren (eigentlich nicht nötig)
tsdf <- data[c("year", "state_prev", "ai_prev", "hrw_prev", "actor_type", "form")]
# subset nur mit fällen in denen explizit von "rape" die sprache ist!
#tsdf <- subset(data, select=c("year", "state_prev", "ai_prev", "hrw_prev", "actor_type", "form"), subset=(form=="rape"))
# missing values deklarieren (müssen in späteren funktionen, z.B. mean allerdings immer noch spezifisch von kalkulation ausgeschlossen werden!)
# ?berfl?ssig, da oben im import deklariert!
#tsdf$state_prev[tsdf$state_prev==(-99)] <- NA
#tsdf$ai_prev[tsdf$ai_prev==(-99)] <- NA
#tsdf$hrw_prev[tsdf$hrw_prev==(-99)] <- NA
#tsdf$form[tsdf$form==(-99)] <- NA
#tsdf$state_prev[is.null(tsdf$state_prev)] <- NA
#tsdf$ai_prev[is.null(tsdf$ai_prev)] <- NA
#tsdf$hrw_prev[is.null(tsdf$hrw_prev)] <- NA
#tsdf$form[is.null(tsdf$form)] <- NA
# Means für jeden Fall aus den drei prevalence-Variablen bilden; wenn aggregiert wird (s.u.) sollte das erst an späterer stelle erfolgen!
# die nächsten beiden Zeilen sind äquivalent, sofern 2:4 die Position der Spalte/Variable sind!
#tsdf$rowMeans <- rowMeans(tsdf[,2:4], na.rm=TRUE)
#tsdf$prevalenceMeans <- rowMeans(tsdf[c("state_prev", "ai_prev", "hrw_prev")], na.rm=TRUE)
#tsdf$prevalenceRounded <- round(tsdf$prevalenceMeans)
# recode groups: staatliche Akteure (1,2,4) zusammenfassen, andere (3=rebellen, 6=pro-government militias)
# | = OR
tsdf$groups_simplified <- NA # best practice: Variable mit NA initialisieren!
tsdf$groups_simplified[tsdf$actor_type==1 | tsdf$actor_type==2 | tsdf$actor_type==4] <- as.character("State Actor")
tsdf$groups_simplified[tsdf$actor_type==3] <- as.character("Non-State Actor")
tsdf$groups_simplified[tsdf$actor_type==6] <- as.character("Government-supporting Militia")
## DATA BY YEAR
### Optimistische MEthode (in jedem Fall wird das höchste behalten)
# neue variable die jeweils den höchsten wert aus SD, AI und HRW enthält
#tsdf[is.null(tsdf)]<-0
# problem mit na.rm: wenn =T, dann wird in Zeilen wo alle Zellen NA sind max=-Inf
# wenn na.rm=F tritt dieses Problem nicht auf, wenn aber in einer Quelle NA, ist auch max=NA, egal ob eine andere quelle z.B. =3 (hohe Prevalence) ist!
tsdf$highestPrevalence <- apply(tsdf[c("state_prev", "ai_prev", "hrw_prev")], 1, max, na.rm=T)
# daher in einem zwischenschritt alle -Inf durch NA ersetzen!
tsdf$highestPrevalence[is.infinite(tsdf$highestPrevalence) | is.nan(tsdf$highestPrevalence)] <- NA
# aggregate by years for every recoded actor_type ($groups_simplified)
aggdata_optimistic <- aggregate(tsdf[c("highestPrevalence")], list(time=tsdf$year, grp=tsdf$groups_simplified), mean, na.rm=TRUE)
### KONSERVATIVE METHODE (mean aus allen drei prevalence quellen)
# aggregate by years for every recoded actor_type ($groups_simplified)
aggdata_conservative <- aggregate(tsdf[c("state_prev", "ai_prev", "hrw_prev")], list(time=tsdf$year, grp=tsdf$groups_simplified), mean, na.rm=TRUE)
# konservative methode: MEAN aus den drei berechnen
aggdata_conservative$prevalenceMeans <- rowMeans(aggdata_conservative[c("state_prev", "ai_prev", "hrw_prev")], na.rm=TRUE)
# Plotting
library(ggplot2)
par(mfrow=c(2,1))
# plot optimistic counting
plot1 <- ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp)) +
geom_point(size=3)
# plot conservative counting
plot2 <- ggplot(data=aggdata_conservative, aes(x=time, y=prevalenceMeans, colour=grp)) +
geom_line(aes(group=grp)) +
geom_point(size=3)
require(gridExtra)
grid.arrange(plot1, plot2, nrow=2)
#plot(aggdata$time, aggdata$prevalenceMeans, pch=aggdata$grp, type="b")
# import csv: NOCH FILE ENCODING HINZUF?GEN!
data <- read.csv(file.choose(), na.strings=c(-99, NULL, "", " "), fileEncoding="utf8")
# erstelle kleines dataset nur mit den variablen die mich interessieren (eigentlich nicht nötig)
tsdf <- data[c("year", "state_prev", "ai_prev", "hrw_prev", "actor_type", "form")]
# subset nur mit fällen in denen explizit von "rape" die sprache ist!
#tsdf <- subset(data, select=c("year", "state_prev", "ai_prev", "hrw_prev", "actor_type", "form"), subset=(form=="rape"))
# missing values deklarieren (müssen in späteren funktionen, z.B. mean allerdings immer noch spezifisch von kalkulation ausgeschlossen werden!)
# ?berfl?ssig, da oben im import deklariert!
#tsdf$state_prev[tsdf$state_prev==(-99)] <- NA
#tsdf$ai_prev[tsdf$ai_prev==(-99)] <- NA
#tsdf$hrw_prev[tsdf$hrw_prev==(-99)] <- NA
#tsdf$form[tsdf$form==(-99)] <- NA
#tsdf$state_prev[is.null(tsdf$state_prev)] <- NA
#tsdf$ai_prev[is.null(tsdf$ai_prev)] <- NA
#tsdf$hrw_prev[is.null(tsdf$hrw_prev)] <- NA
#tsdf$form[is.null(tsdf$form)] <- NA
# Means für jeden Fall aus den drei prevalence-Variablen bilden; wenn aggregiert wird (s.u.) sollte das erst an späterer stelle erfolgen!
# die nächsten beiden Zeilen sind äquivalent, sofern 2:4 die Position der Spalte/Variable sind!
#tsdf$rowMeans <- rowMeans(tsdf[,2:4], na.rm=TRUE)
#tsdf$prevalenceMeans <- rowMeans(tsdf[c("state_prev", "ai_prev", "hrw_prev")], na.rm=TRUE)
#tsdf$prevalenceRounded <- round(tsdf$prevalenceMeans)
# recode groups: staatliche Akteure (1,2,4) zusammenfassen, andere (3=rebellen, 6=pro-government militias)
# | = OR
tsdf$groups_simplified <- NA # best practice: Variable mit NA initialisieren!
tsdf$groups_simplified[tsdf$actor_type==1 | tsdf$actor_type==2 | tsdf$actor_type==4] <- as.character("State Actor")
tsdf$groups_simplified[tsdf$actor_type==3] <- as.character("Non-State Actor")
tsdf$groups_simplified[tsdf$actor_type==6] <- as.character("Government-supporting Militia")
## DATA BY YEAR
### Optimistische MEthode (in jedem Fall wird das höchste behalten)
# neue variable die jeweils den höchsten wert aus SD, AI und HRW enthält
#tsdf[is.null(tsdf)]<-0
# problem mit na.rm: wenn =T, dann wird in Zeilen wo alle Zellen NA sind max=-Inf
# wenn na.rm=F tritt dieses Problem nicht auf, wenn aber in einer Quelle NA, ist auch max=NA, egal ob eine andere quelle z.B. =3 (hohe Prevalence) ist!
tsdf$highestPrevalence <- apply(tsdf[c("state_prev", "ai_prev", "hrw_prev")], 1, max, na.rm=T)
# daher in einem zwischenschritt alle -Inf durch NA ersetzen!
tsdf$highestPrevalence[is.infinite(tsdf$highestPrevalence) | is.nan(tsdf$highestPrevalence)] <- NA
# aggregate by years for every recoded actor_type ($groups_simplified)
aggdata_optimistic <- aggregate(tsdf[c("highestPrevalence")], list(time=tsdf$year, grp=tsdf$groups_simplified), mean, na.rm=TRUE)
### KONSERVATIVE METHODE (mean aus allen drei prevalence quellen)
# aggregate by years for every recoded actor_type ($groups_simplified)
aggdata_conservative <- aggregate(tsdf[c("state_prev", "ai_prev", "hrw_prev")], list(time=tsdf$year, grp=tsdf$groups_simplified), mean, na.rm=TRUE)
# konservative methode: MEAN aus den drei berechnen
aggdata_conservative$prevalenceMeans <- rowMeans(aggdata_conservative[c("state_prev", "ai_prev", "hrw_prev")], na.rm=TRUE)
# Plotting
library(ggplot2)
# plot optimistic counting
plot1 <- ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp)) +
geom_point(size=3)
# plot conservative counting
plot2 <- ggplot(data=aggdata_conservative, aes(x=time, y=prevalenceMeans, colour=grp)) +
geom_line(aes(group=grp)) +
geom_point(size=3)
require(gridExtra)
grid.arrange(plot1, plot2, nrow=2)
#plot(aggdata$time, aggdata$prevalenceMeans, pch=aggdata$grp, type="b")
View(tsdf)
View(tsdf)
View(data)
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), fileEncoding="utf8")
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), fileEncoding="utf8")
data <- read.csv(file.choose(), na.strings=c(-99, NULL, "", " "), fileEncoding="utf8")
data <- read.csv(file.choose(), na.strings=c(-99, NULL, "", " "), fileEncoding="utf8")
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), fileEncoding="utf8")
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), fileEncoding="utf8")
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), fileEncoding="utf8")
# import csv: NOCH FILE ENCODING HINZUF?GEN!
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), fileEncoding="utf8")
# erstelle kleines dataset nur mit den variablen die mich interessieren (eigentlich nicht nötig)
#tsdf <- data[c("year", "state_prev", "ai_prev", "hrw_prev", "actor_type", "form")]
# recode groups: staatliche Akteure (1,2,4) zusammenfassen, andere (3=rebellen, 6=pro-government militias)
data$groups_simplified <- NA # best practice: Variable mit NA initialisieren!
data$groups_simplified[data$actor_type==1 | data$actor_type==2 | data$actor_type==4] <- as.character("State Actor")
data$groups_simplified[data$actor_type==3] <- as.character("Non-State Actor")
data$groups_simplified[data$actor_type==6] <- as.character("Government-supporting Militia")
# Aggregation by year
### Optimistische MEthode (in jedem Fall wird das höchste behalten)
# neue variable die jeweils den höchsten wert aus SD, AI und HRW enthält
data$highestPrevalence <- apply(data[c("state_prev", "ai_prev", "hrw_prev")], 1, max, na.rm=T)
data$highestPrevalence[is.infinite(data$highestPrevalence) | is.nan(data$highestPrevalence)] <- NA
aggdata_optimistic <- aggregate(data[c("highestPrevalence")], list(time=data$year, grp=data$groups_simplified), mean, na.rm=TRUE)
### KONSERVATIVE METHODE (mean aus allen drei prevalence quellen)
aggdata_conservative <- aggregate(data[c("state_prev", "ai_prev", "hrw_prev")], list(time=data$year, grp=data$groups_simplified), mean, na.rm=TRUE)
aggdata_conservative$prevalenceMeans <- rowMeans(aggdata_conservative[c("state_prev", "ai_prev", "hrw_prev")], na.rm=TRUE)
# Plotting
library(ggplot2)
require(gridExtra)
# plot optimistic counting
plot1 <- ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp)) +
geom_point(size=3)
# plot conservative counting
plot2 <- ggplot(data=aggdata_conservative, aes(x=time, y=prevalenceMeans, colour=grp)) +
geom_line(aes(group=grp)) +
geom_point(size=3)
grid.arrange(plot1, plot2, nrow=2)
#plot(aggdata$time, aggdata$prevalenceMeans, pch=aggdata$grp, type="b")
# import csv: NOCH FILE ENCODING HINZUF?GEN!
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), fileEncoding="utf8")
# erstelle kleines dataset nur mit den variablen die mich interessieren (eigentlich nicht nötig)
#tsdf <- data[c("year", "state_prev", "ai_prev", "hrw_prev", "actor_type", "form")]
# recode groups: staatliche Akteure (1,2,4) zusammenfassen, andere (3=rebellen, 6=pro-government militias)
data$groups_simplified <- NA # best practice: Variable mit NA initialisieren!
data$groups_simplified[data$actor_type==1 | data$actor_type==2 | data$actor_type==4] <- as.character("State Actor")
data$groups_simplified[data$actor_type==3] <- as.character("Non-State Actor")
data$groups_simplified[data$actor_type==6] <- as.character("Government-supporting Militia")
# Aggregation by year
### Optimistische MEthode (in jedem Fall wird das höchste behalten)
# neue variable die jeweils den höchsten wert aus SD, AI und HRW enthält
data$highestPrevalence <- apply(data[c("state_prev", "ai_prev", "hrw_prev")], 1, max, na.rm=T)
data$highestPrevalence[is.infinite(data$highestPrevalence) | is.nan(data$highestPrevalence)] <- NA
aggdata_optimistic <- aggregate(data[c("highestPrevalence")], list(time=data$year, grp=data$groups_simplified), mean, na.rm=TRUE)
### KONSERVATIVE METHODE (mean aus allen drei prevalence quellen)
aggdata_conservative <- aggregate(data[c("state_prev", "ai_prev", "hrw_prev")], list(time=data$year, grp=data$groups_simplified), mean, na.rm=TRUE)
aggdata_conservative$prevalenceMeans <- rowMeans(aggdata_conservative[c("state_prev", "ai_prev", "hrw_prev")], na.rm=TRUE)
# Plotting
library(ggplot2)
require(gridExtra)
# plot optimistic counting
plot1 <- ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp)) +
geom_point(size=3)
# plot conservative counting
plot2 <- ggplot(data=aggdata_conservative, aes(x=time, y=prevalenceMeans, colour=grp)) +
geom_line(aes(group=grp)) +
geom_point(size=3)
grid.arrange(plot1, plot2, nrow=2)
#plot(aggdata$time, aggdata$prevalenceMeans, pch=aggdata$grp, type="b")
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), fileEncoding="utf8")
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), fileEncoding="utf8")
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), fileEncoding="utf8")
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), fileEncoding="utf8")
?read.csv
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), fileEncoding="utf8", header=T)
# import csv: NOCH FILE ENCODING HINZUF?GEN!
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), sep=";", fileEncoding="utf8")
# erstelle kleines dataset nur mit den variablen die mich interessieren (eigentlich nicht nötig)
#tsdf <- data[c("year", "state_prev", "ai_prev", "hrw_prev", "actor_type", "form")]
# recode groups: staatliche Akteure (1,2,4) zusammenfassen, andere (3=rebellen, 6=pro-government militias)
data$groups_simplified <- NA # best practice: Variable mit NA initialisieren!
data$groups_simplified[data$actor_type==1 | data$actor_type==2 | data$actor_type==4] <- as.character("State Actor")
data$groups_simplified[data$actor_type==3] <- as.character("Non-State Actor")
data$groups_simplified[data$actor_type==6] <- as.character("Government-supporting Militia")
# Aggregation by year
### Optimistische MEthode (in jedem Fall wird das höchste behalten)
# neue variable die jeweils den höchsten wert aus SD, AI und HRW enthält
# keep only those where rape is mentionned
#data <- subset(data, form=="rape")
data$highestPrevalence <- apply(data[c("state_prev", "ai_prev", "hrw_prev")], 1, max, na.rm=T)
data$highestPrevalence[is.infinite(data$highestPrevalence) | is.nan(data$highestPrevalence)] <- NA
aggdata_optimistic <- aggregate(data[c("highestPrevalence")], list(time=data$year, grp=data$groups_simplified), mean, na.rm=TRUE)
### KONSERVATIVE METHODE (mean aus allen drei prevalence quellen)
aggdata_conservative <- aggregate(data[c("state_prev", "ai_prev", "hrw_prev")], list(time=data$year, grp=data$groups_simplified), mean, na.rm=TRUE)
aggdata_conservative$prevalenceMeans <- rowMeans(aggdata_conservative[c("state_prev", "ai_prev", "hrw_prev")], na.rm=TRUE)
# Plotting
library(ggplot2)
require(gridExtra)
?geom_line
ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp, linetype=grp)) +
geom_point(size=3)
?invert
??invert
??order
ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence)) +
geom_line(aes(group=grp, linetype=grp)) +
geom_point(size=3)
ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence)) +
geom_line(aes(group=grp, linetype=grp)) +
geom_point(size=1)
ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence)) +
geom_line(aes(group=grp, linetype=grp)) +
geom_point()
ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp, linetype=grp))
ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp, linetype=grp, size=2))
ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp, linetype=grp, size=1))
ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp, linetype=grp, size=0.5))
ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp, linetype=grp))
?geom_line
ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp, size=1, linetype=grp))
qplot(time, highestPrevalence, data=aggdata_optimistic, group=grp, geom="line")
qplot(time, highestPrevalence, data=aggdata_optimistic, group=grp, geom="line", colour=grp, linetype=grp)
ggplot(data=aggdata_optimistic, aes(x=time, y=highestPrevalence, colour=grp)) +
geom_line(aes(group=grp, linetype=grp)) +
geom_point()
## beide in einem graph
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") #+
#geom_text(aes(x=year, y=((freq/total)*100)-1, label=round((freq/total)*100)))
# Global trends sexual violence reporting 1989-2009
library(ggplot2)
library(gridExtra)
library(plyr)
data <- read.csv(file.choose(), na.strings=c(-99, NULL, ""), sep=";", fileEncoding="utf8")
level.year.all <- aggregate(data[c("state_prev")], list(year=data$year), mean, na.rm=TRUE)
qplot(data=level.year.all, x=year, y=state_prev, geom = "line", ylab="Berichtetes Vorkommen", xlab="", ylim=c(0,3))
data.only.positive.cases <- subset(data, state_prev != 0, select = c(year, state_prev))
level.year.positive.only <- aggregate(data.only.positive.cases[c("state_prev")], list(year=data.only.positive.cases$year), mean, na.rm=TRUE)
qplot(data=level.year.positive.only, x=year, y=state_prev, geom = "line", ylab="Berichtetes Vorkommen", xlab="", ylim=c(0,3))
tmp1 <- count(data[!is.na(data$state_prev) & data$state_prev != 0,],c("year")) # zähle alle aufgeführten konflikte pro jahr in denen sex.viol.-level mindestens 1 war
total <- count(data[!is.na(data$state_prev),],c("year")) # totale Anzahl an Konflikten pro Jahr
tmp1$total <- total$freq
plot1 <- ggplot(tmp1, aes(x=year, y=freq, group=1)) +
theme_bw() + geom_line() + geom_point()
plot2 <- ggplot(tmp1, aes(x=year, y=(freq/total)*100, group=1)) +
theme_bw() + geom_line() + geom_point()
grid.arrange(plot1, plot2, nrow=2) # positioniere beide untereinander
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") #+
#geom_text(aes(x=year, y=((freq/total)*100)-1, label=round((freq/total)*100)))
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=as.Date("2010-01-01"), y=10, label="Relative Häufigkeit (in %)", size=3) +
annotate(geom="text", x=as.Date("2010-01-01"), y=30, label="Absolute Häufigkeit", size=3)
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="")
class(tmp1$year)
class(tmp1$freq)
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=as.integer("2010-01-01"), y=10, label="Relative Häufigkeit (in %)", size=3) +
annotate(geom="text", x=as.integer("2010-01-01"), y=30, label="Absolute Häufigkeit", size=3)
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=as.integer(as.Date("2010-01-01")), y=10, label="Relative Häufigkeit (in %)", size=3) +
annotate(geom="text", x=as.integer(as.Date("2010-01-01")), y=30, label="Absolute Häufigkeit", size=3)
tmp1$year <- as.Date(tmp1$year)
class(tmp1$year)
tmp1$year <- as.Date.numeric(tmp1$year)
?as.Date
tmp1$year <- as.Date.numeric(origin=tmp1$year)
tmp1$year <- as.Date(origin=tmp1$year)
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=as.Date("2010-01-01"), y=10, label="Relative Häufigkeit (in %)", size=3) +
annotate(geom="text", x=as.Date("2010-01-01"), y=30, label="Absolute Häufigkeit", size=3)
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="")
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=2010, y=10, label="Relative Häufigkeit (in %)", size=3)
View(tmp1)
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=2010, y=10, label="Relative Häufigkeit (in %)", size=3) +
annotate(geom="text", x=2010, y=30, label="Absolute Häufigkeit", size=3)
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=2009, y=10, label="Relative\nHäufigkeit\n(in %)", size=3) +
annotate(geom="text", x=2009, y=28, label="Absolute\nHäufigkeit", size=3)
geom_smooth(method="lm")
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=2009, y=10, label="Relative\nHäufigkeit\n(in %)", size=3) +
annotate(geom="text", x=2009, y=28, label="Absolute\nHäufigkeit", size=3) +
geom_smooth(method="lm")
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=2009, y=10, label="Relative\nHäufigkeit\n(in %)", size=3) +
annotate(geom="text", x=2009, y=28, label="Absolute\nHäufigkeit", size=3) +
geom_smooth(method="lm", aes(x=year, y=freq))
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=2009, y=10, label="Relative\nHäufigkeit\n(in %)", size=3) +
annotate(geom="text", x=2009, y=28, label="Absolute\nHäufigkeit", size=3) +
geom_smooth(method="lm", aes(x=year, y=freq)) +
geom_smooth(method="lm", aes(x=year, y=(freq/total)*100))
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=2009, y=10, label="Relative\nHäufigkeit\n(in %)", size=3) +
annotate(geom="text", x=2009, y=28, label="Absolute\nHäufigkeit", size=3) +
geom_smooth(method="lm", aes(x=year, y=freq)) +
geom_smooth(method="lm", aes(x=year, y=(freq/total)*100)) #
geom_text(aes(x=year, y=((freq/total)*100)-1, label=round((freq/total)*100)))
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=2009, y=10, label="Relative\nHäufigkeit\n(in %)", size=3) +
annotate(geom="text", x=2009, y=28, label="Absolute\nHäufigkeit", size=3) #
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=2009, y=10, label="Relative\nHäufigkeit\n(in %)", size=3) +
annotate(geom="text", x=2009, y=29, label="Absolute\nHäufigkeit", size=3) #+
#  geom_smooth(method="lm", aes(x=year, y=freq)) +
#  geom_smooth(method="lm", aes(x=year, y=(freq/total)*100))
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=2009, y=10, label="Relative\nHäufigkeit\n(in %)", size=3) +
annotate(geom="text", x=2009, y=30, label="Absolute\nHäufigkeit", size=3) #+
#  geom_smooth(method="lm", aes(x=year, y=freq)) +
#  geom_smooth(method="lm", aes(x=year, y=(freq/total)*100))
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=2009, y=12, label="Relative\nHäufigkeit\n(in %)", size=3) +
annotate(geom="text", x=2009, y=28, label="Absolute\nHäufigkeit", size=3) #+
#  geom_smooth(method="lm", aes(x=year, y=freq)) +
#  geom_smooth(method="lm", aes(x=year, y=(freq/total)*100))
ggplot(tmp1) + theme_bw() +
geom_line(aes(x=year, y=freq, group=1)) + #geom_point(aes(x=year, y=freq)) +
geom_line(aes(x=year, y=(freq/total)*100, group=1), linetype="dotted") + #geom_point(aes(x=year, y=(freq/total)*100)) +
labs("",x="",y="") +
annotate(geom="text", x=2009, y=11, label="Relative\nHäufigkeit\n(in %)", size=3) +
annotate(geom="text", x=2009, y=28, label="Absolute\nHäufigkeit", size=3) #+
#  geom_smooth(method="lm", aes(x=year, y=freq)) +
#  geom_smooth(method="lm", aes(x=year, y=(freq/total)*100))
load(mtcars)
data(mtcars)
?mtcars
